Fisher test by module
Level 3 of clusters. We keep only the modules with more than 30 nodes for downstream analysis.
# Network clusters
net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_net_MF/"
fea_dir = "/pastel/Github_scripts/SpeakEasy_dlpfc/bulk_dlpfc/FEA_analysis/fisher_heatmap/"
modules_file = read.table(paste0(net_dir, "geneBycluster.txt"), header = T)
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
too_small = as.character( modules_size$module[modules_size$n_nodes < 30]) # Get modules < 30 nodes to be removed
net_output = modules_file %>% filter(! cluster_lv3 %in% as.integer(too_small)) # the clusters with at least 30 nodes
createDT(net_output)Universe tested:
## [1] 17258
Pvalue adjusted by Bonferroni
# Gene lists. These files must contain 2 columns, with header: 1) symbol, 2) ensembl
path_to_files = "/pastel/resources/gene_lists/mynd_lists/"
geneList_file_names = c("antigen_presentation_GO.txt",
"apoptosis_GO.txt",
"autophagy_GO.txt",
"cell_proliferation_GO.txt",
"DNA_metabolic_GO.txt",
"DNA_methylation_GO.txt",
"DNA_repair_GO.txt",
"DNA_replication_GO.txt",
"endosomal_transport_GO.txt",
"exocytosis_GO.txt",
"glucose_metabolism_GO.txt",
"IFNb_response_GO.txt",
"IFNg_response_GO.txt",
"inflammatory_resp_GO.txt",
"lipid_metabolism_GO.txt",
"lysosome_GO.txt",
"macroautophagy_GO.txt",
"mitochondria_GO.txt",
"neutrophil_activation_GO.txt",
"phagocytosis_GO.txt",
"protein_ubiquitinization_GO.txt",
"proteolysis_GO.txt",
"response_cytokine_GO.txt",
"ribosome_GO.txt",
"RNA_splicing_GO.txt",
"translation_GO.txt",
"vesicle_med_transp_GO.txt",
"viral_response_GO.txt")
genes_universe = net_output$ensembl
cluster_lables = net_output$cluster_lv3
geneAnnotation_lists = parseGeneLists(path_to_files, geneList_file_names, genes_universe)
gene_cluster_enrich = moduleEnrich(genes_universe, cluster_lables, geneAnnotation_lists)
p = plot_module_enrichment_heatmap(cluster_lables, gene_cluster_enrich, plot_title = "Module enrichment", filter_pval = 1, bonf.adj.pval = F)
# pdf(file = paste0(fea_dir, "h_fisher_MFlevel3_GO_bonf.pdf"), width = 18, height = 17)
# p
# dev.off()
pPvalue adjusted by Bonferroni.
Cell type markers from Johnson et al, 2022 , plus m109 from Mostafavi et al, 2018. GWAS AD from Bellenguez et al, 2022. Plaque-induced gene list - PIG from Chen et al, 2020.
# Gene lists. These files must contain 2 columns, with header: 1) symbol, 2) ensembl
path_to_files = "/pastel/resources/gene_lists/celltype_markers/"
lists_dir = "/pastel/resources/gene_lists/lists4heatmap/"
geneList_file_names = c("astrocytes.txt",
"microglia.txt",
"neuron.txt",
"oligodendrocytes.txt",
"endothelia.txt",
"m109_mostafavi.txt",
"ROSMAP_up_AD.txt",
"ROSMAP_down_AD.txt",
"GWAS_AD_Bel2022.txt",
"PIG_orthologs.txt")
genes_universe = net_output$ensembl
cluster_lables = net_output$cluster_lv3
geneAnnotation_lists = parseGeneLists(lists_dir, geneList_file_names, genes_universe)
gene_cluster_enrich = moduleEnrich(genes_universe, cluster_lables, geneAnnotation_lists)
p = plot_module_enrichment_heatmap(cluster_lables, gene_cluster_enrich, plot_title = "Module enrichment", filter_pval = 1, bonf.adj.pval = F)
# pdf(file = paste0(fea_dir, "h_fisher_MFlevel3_celltype_pig.pdf"), width = 8, height = 15)
# p
# dev.off()
# save(gene_cluster_enrich, file = paste0(fea_dir, "Enrichment_res.Rdata"))
p## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ComplexHeatmap_2.15.4 circlize_0.4.16 lubridate_1.9.3
## [4] forcats_1.0.0 stringr_1.5.1 purrr_1.0.2
## [7] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
## [10] ggplot2_3.5.1 tidyverse_2.0.0 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.13 png_0.1-8 digest_0.6.36
## [4] foreach_1.5.2 utf8_1.2.4 R6_2.5.1
## [7] stats4_4.1.2 evaluate_0.24.0 highr_0.11
## [10] pillar_1.9.0 GlobalOptions_0.1.2 rlang_1.1.4
## [13] rstudioapi_0.16.0 jquerylib_0.1.4 magick_2.8.4
## [16] S4Vectors_0.32.4 GetoptLong_1.0.5 DT_0.33
## [19] rmarkdown_2.27 htmlwidgets_1.6.4 munsell_0.5.1
## [22] compiler_4.1.2 xfun_0.46 pkgconfig_2.0.3
## [25] BiocGenerics_0.40.0 shape_1.4.6.1 htmltools_0.5.8.1
## [28] tidyselect_1.2.1 IRanges_2.28.0 codetools_0.2-18
## [31] matrixStats_1.3.0 fansi_1.0.6 crayon_1.5.3
## [34] tzdb_0.4.0 withr_3.0.0 jsonlite_1.8.8
## [37] gtable_0.3.5 lifecycle_1.0.4 magrittr_2.0.3
## [40] scales_1.3.0 cli_3.6.3 stringi_1.8.4
## [43] cachem_1.1.0 doParallel_1.0.17 bslib_0.8.0
## [46] generics_0.1.3 vctrs_0.6.5 rjson_0.2.21
## [49] RColorBrewer_1.1-3 Cairo_1.6-2 iterators_1.0.14
## [52] tools_4.1.2 glue_1.7.0 hms_1.1.3
## [55] crosstalk_1.2.1 parallel_4.1.2 fastmap_1.2.0
## [58] yaml_2.3.10 timechange_0.3.0 clue_0.3-65
## [61] colorspace_2.1-1 cluster_2.1.2 knitr_1.48
## [64] sass_0.4.9